home *** CD-ROM | disk | FTP | other *** search
- /*********************************************************************/
- /* Reference: The C Programming Tutor, Leon A. Wortman, Thomas */
- /* O. Sidebottom. pp 195-221 */
- /* Modified April 1, 1985 - A. DeSouza */
- /*********************************************************************/
- /* include files */
- /*********************************************************************/
- #include <c:stdio.h>
- /*********************************************************************/
- /* definitions */
- /*********************************************************************/
- #define BOOL int
- #define DQUOTE '"'
- #define EQUALS 0
- #define FALSE 0
- #define FIRSTGREATER 1
- #define FIRSTLESS -1
- #define HTSIZE 1009 /* must be prime */
- #define MAXSTRLEN 255
- #define MAXWORDSIZE 20
- #define NULL 0
- #define SLASH '/'
- #define SQUOTE '\''
- #define STAR '*'
- #define TRUE 1
- #define VOID int
- #define MAXLNS 64 /* Maximum lines per page for Xerox */
- /*********************************************************************/
- /* code macros */
- /*********************************************************************/
- #define PutBack(Ch) {PutBackChar = Ch; }
-
- /*********************************************************************/
- /* typedefs */
- /*********************************************************************/
- typedef struct rType {
- int Reference; /* line number */
- struct rType *Link; /* link to next reference */
- }
- RefType, *RefPtr;
-
- typedef struct tType {
- char *Value; /* pointer to token */
- RefPtr OccurList; /* line number list */
- }
- TokenType, *TokenPtr;
-
- /*********************************************************************/
- /* eternal functions */
- /*********************************************************************/
- extern char *malloc();
- extern RefPtr MakeOccurrence();
-
- /*********************************************************************/
- /* global variables */
- /*********************************************************************/
- int CurLineNo = 1; /* Line being read. */
- TokenType HashTable [HTSIZE];
- int PutBackChar = '\0'; /* Character put back after input */
- int PageNo = 1;
- int LnCount = 5; /* Line count includes page break */
- char FlagPg = FALSE;
- FILE *FileOut; /* Output file */
-
- main (argc, argv)
- int argc;
- char *argv[];
- { /* main */
- int Counter;
- char CurToken [MAXSTRLEN];
- char c;
- char *ptrin, *ptrout;
- FILE *InFile;
-
- if (!(InFile = fopen(argv[1], "r"))) {
- fprintf(stderr,"Can't open input file %s.\n", argv[1]);
- exit(0);
- }
-
- printf (" Output for Xerox 2700 Printer (Y/N): ");
- c = toupper(getchar());
- if (c == 'Y') FlagPg = TRUE;
-
- for (ptrout=CurToken, ptrin=argv[1]; *ptrin; ptrin++, ptrout++)
- {
- if (*ptrin == '.' || *ptrin == ' ')
- break;
- *ptrout = *ptrin;
- }
- *ptrout = '\0';
- strcat (CurToken, ".lst");
- FileOut = fopen(CurToken, "w");
-
- /* Initialize the hash table.
- */
- for (Counter = 0; Counter < HTSIZE; Counter++) {
- HashTable[Counter].Value = NULL;
- HashTable[Counter].OccurList = NULL;
- }
-
- /* Write the first line numbr to the output stream.
- */
- fprintf(FileOut, " \t \t \t \t \t Page %d ", PageNo);
- PageNo++;
- fprintf(FileOut, "\n%4d ", CurLineNo);
-
- /* Read and store each token.
- */
- while (GetNxtToken(CurToken, InFile)) {
- if (InsertToken(CurToken)) {
- fprintf(stderr, "Hash table size exceeded.\n");
- break;
- }
- }
- fprintf(FileOut, " End-of-file.\n\n");
-
- /* sort the tokens alphabetically.
- */
- SortTokens(HTSIZE);
-
- PrintTable();
- } /* main */
-
- VOID AddOccurrence(HeadPtr)
- RefPtr HeadPtr;
- /* AddOccurrence adds a new line occurrence to the
- * end of the list that HeadPtr begins.
- */
- { /* AddOccurrence */
- RefPtr CurPtr;
- RefPtr NewPtr;
-
- /* Find the end of the list by starting at
- * the beginning and advancing through the list
- * until we find the end. */
- for (CurPtr = HeadPtr; CurPtr->Link != NULL;
- CurPtr = CurPtr->Link)
- ;
-
- CurPtr->Link = NewPtr = (RefPtr)malloc(sizeof(RefType));
- if (NewPtr == NULL) {
- fprintf("Out of Memory in AddOccurrence.");
- exit(0);
- }
- NewPtr->Reference = CurLineNo;
- NewPtr->Link = (RefPtr)NULL;
- } /* AddOccurrence */
-
- char *Copy(OldString)
- char *OldString;
- /* Copy makes a copy of OldString and returns the
- * address of the copy. */
- { /* Copy */
- char *NewString;
-
- /* Allocate a string able to hold the length of the
- * string plus one for the terminator. */
- NewString = malloc(strlen(OldString) + 1);
-
- /* Copy the string and return a pointer to it.
- */
- strcpy(NewString, OldString);
- return (NewString);
- } /* Copy */
-
- int GenerateNewIndex(OriginalKey, ProbeNumber)
- int OriginalKey;
- int ProbeNumber;
-
- /* GenerateNewIndex takes the current hash table index and
- * generates a new index for collision resolution.
- */
- { /* GenerateNewIndex */
- return ((OriginalKey + ProbeNumber * ProbeNumber) % HTSIZE);
- } /* GenerateNewIndex */
-
- int GetNxtChar(InputFile)
- FILE *InputFile;
- /* GetNxtChar returns the next non-comment
- * a non-string character from Inputfile.
- *
- * At least one character is read from InputFile. If
- * the character could start a comment the next character
- * is checked. If the slash-star of a comment
- * is read the reading of characters continues until the end
- * of the comment is found. If a single or double quote
- * is read the reading of characters continues until
- * the closing mark is found. When end-of-file is encountered
- * EOF is returned. GetNxtChar therefore never returns
- * characters inside comments or quotes.
- */
- { /* GetNxtChar */
- int CheckChar;
- int NewChar;
- int TempChar;
-
- /* If end-of-file is found, return immediately.
- */
- if ((NewChar = GetRawChar(InputFile)) == EOF)
- return(EOF);
-
- /* If a single or double quote is found, process the string.
- */
- if (NewChar == SQUOTE || NewChar == DQUOTE) {
- CheckChar = NewChar;
- /* Continue reading until the matching quote character
- * is found.
- */
- while ((NewChar = GetRawChar(InputFile)) != CheckChar &&
- NewChar != EOF);
- /* The terminating character has now been read.
- * Read one more and return it.
- */
- NewChar = GetRawChar(InputFile);
- }
- /* Next handle comment processing. If SLASH is read,
- * check to see if the next characte is a STAR. If it is,
- * continue reading until the matching STAR-SLASH is found.
- */
- else if (NewChar == SLASH) {
- if ((TempChar = GetRawChar(InputFile)) != STAR) {
- /* Not a comment; put back the character.
- */
- PutBack(TempChar);
- }
- else
- /* Here's a comment. Search for the end.
- */
- while (TRUE) {
- /* Read characters until a STAR is found.
- */
- while ((NewChar = GetRawChar(InputFile)) != STAR &&
- NewChar != EOF);
-
- if ((NewChar = GetRawChar(InputFile)) == SLASH) {
- /* The end of the comment is found.
- */
- NewChar = GetRawChar(InputFile);
- break;
- }
- else
- /* Not end of comment. Put the character back.
- */
- PutBack(NewChar);
- /* If end-of-file has been found, stop processing.
- */
- if (NewChar == EOF)
- break;
- }
- }
- return (NewChar);
- } /* GetNxtChar */
-
- BOOL GetNxtToken(Buffer, InputFile)
- char *Buffer;
- FILE *InputFile;
- /* GetNxtToken returns in Buffer the next valid C token in the
- * the program. */
- { /* GetNxtToken */
- int CurChar;
-
- /* Read characters from Inputfile until starting letter is
- * found. Stop when find a valid letter or end-of-file.
- */
- while (!IsStartingLetter(CurChar = GetNxtChar(InputFile)) &&
- CurChar != EOF)
- ;
-
- /* Insert the first character.
- */
- *Buffer++ = CurChar;
-
- /* Read and accumulate characters in Buffer while they are
- * valid letters for a token.
- */
- while (IsTokenLetter(CurChar = GetNxtChar(InputFile)) &&
- CurChar != EOF)
- *Buffer++ = CurChar;
-
- /* Put back the last character we read.
- */
- if (CurChar != EOF)
- PutBack(CurChar);
-
- /* Terminate the token.
- */
- *Buffer = '\0';
-
- /* Return end-of-file status.
- */
- return ((CurChar == EOF) ? FALSE : TRUE);
- } /* GetNxtToken */
-
- int GetRawChar(InputFile)
- FILE *InputFile;
- /* GetRawChar reads the next character from InputFile and
- * returns it. If a newline is read, it increments
- * CurLineNo. If end-of-file is read, it returns EOF.
- */
- { /* GetRawChar */
- int NextChar;
- /* Check PutBackChar to see if the next character has
- * already been read. If so, process it and set PutBackChar
- * to the null character.
- */
- if (PutBackChar != '\0') {
- NextChar = PutBackChar;
- PutBackChar = '\0';
- }
- else {
- /* Echo the character read to the output stream.
- * If a newline is read, increment the line count.
- */
- if ((NextChar = getc(InputFile)) == '\n') {
- if (FlagPg)
- WrtEndPg();
- fprintf(FileOut, "\n%4d ", ++CurLineNo);
- }
- else
- fputc(NextChar, FileOut);
- }
-
- if (NextChar == EOF)
- return(EOF);
-
- return(NextChar);
- } /* GetRawChar */
-
- VOID WrtEndPg()
- /* If end of page reached put blank line to make a page break
- */
- { /* WrtEndPg */
- LnCount++;
- if ((LnCount % MAXLNS) == 0) {
- fprintf(FileOut, " \n \n \n \n");
- fprintf(FileOut, " \t\t\t\t\t\t\t\t Page %d ", PageNo);
- PageNo++;
- LnCount += 4;
- }
- } /* WrtEndPg */
-
-
- int Hash(Word)
- char *Word;
- /* Hash returns the HTIndex of the Word passed, or an
- * appropriate HTIndex for the word. If HashTable is
- * full, Hash returns -1 */
- { /* Hash */
- int HTIndex;
- int IdLen;
- int InitHTIndex;
- int ProbeCounter = 0;
-
- IdLen = strlen(Word);
- if (IdLen == 0)
- printf("Hash: Word of no length\n");
- HTIndex = InitHTIndex = TransformId(Word);
- if (HashTable[HTIndex].Value == NULL)
- ; /* NULL - We've got it. */
- else /* have we found the correct indes? */
- if (strcmp(Word, HashTable[HTIndex].Value) == EQUALS)
- ; /* DONE: A direct hit! */
- else /* Collision - generate new indices */
- for (ProbeCounter = 0; ProbeCounter < (HTSIZE / 2);
- ProbeCounter++) {
- HTIndex = GenerateNewIndex(InitHTIndex, ProbeCounter);
- if (HashTable[HTIndex].Value == NULL)
- break; /* We've got it! */
- else if (strcmp(Word, HashTable[HTIndex].Value)
- == EQUALS)
- break; /* We've got it! */
- }
- if (ProbeCounter >= (HTSIZE / 2))
- return(-1);
- return(HTIndex);
- } /* Hash */
-
- BOOL InsertToken(Token)
- char *Token;
- /* InsertToken inserts the token into the hash table if it is
- * new. If the token is previously known, it adds the line
- * number to the occurrence list. It then returns FALSE.
- * If the end-of-hash table is reached, TRUE is returned.
- */
- { /* InsertToken */
- int HTIndex;
-
- HTIndex = Hash(Token);
- if (HTIndex == -1)
- return (TRUE);
- if (HashTable[HTIndex].Value == NULL) {
- HashTable[HTIndex].Value = Copy(Token);
- HashTable[HTIndex].OccurList = MakeOccurrence();
- }
- else
- AddOccurrence(HashTable[HTIndex].OccurList);
-
- return (FALSE);
- } /* InsertToken */
-
- BOOL IsStartingLetter(Ch)
- char Ch;
- /* IsStartingLetter returns TRUE if Ch is a valid character
- * to begin a C token.
- */
- { /*IsStartingLetter */
- return ((isalpha(Ch) || Ch == '_') ? TRUE : FALSE);
- } /*IsStartingLetter */
-
- BOOL IsTokenLetter(Ch)
- char Ch;
- /* IsTokenLetter returns TRUE if Ch is a valid character
- * inside a C token.
- */
- { /* IsTokenLetter */
- return ((isalpha(Ch) || isdigit(Ch) || Ch == '_') ? TRUE : FALSE);
- } /* IsTokenLetter */
-
- RefPtr MakeOccurrence()
- /* MakeOccurrence creates the first entry of the line occurrence
- * list. It creates the next node in the list, inserts CurLineNO
- * and sets Link to NULL in preparation for the next entry.
- */
- { /* MakeOccurrence */
- RefPtr NewNode;
-
- if ((NewNode = (RefPtr)malloc(sizeof(RefType))) == NULL) {
- fprintf("Out of memory in MakeOccurrence.");
- exit(0);
- }
- NewNode->Reference = CurLineNo;
- NewNode->Link = (RefPtr)NULL;
-
- return(NewNode);
- } /* MakeOccurrence */
-
- int NameCompare(First, Second)
- int First;
- int Second;
- /* NameCompare compares the two entries in HashTable referenced
- * by the indices First and Second. It returns FIRSTLESS if
- * First < Second; EQUALS if the equal;
- * FIRSTGREATER if First > Second.
- */
- { /* NameCompare */
- /* If the first entry has no value, the first is less.
- */
- if (HashTable[First].Value == NULL)
- return(FIRSTLESS);
-
- /* Then if the second has no value, the first is greater.
- */
- if (HashTable[Second].Value == NULL)
- return(FIRSTGREATER);
-
- /* Otherwise return the comparison from strcmp.
- */
- return (strcmp(HashTable[First].Value, HashTable[Second].Value));
- } /* NameCompare */
-
- VOID PrintTable()
- /* PrintTable prints the list of identifiers and line occurrences.
- */
- { /* PrintTable */
- RefPtr ListPtr;
- int NumOnLine;
- int TokenCounter;
-
- LnCount ++;
- for (TokenCounter = 0; TokenCounter < HTSIZE;
- TokenCounter++) {
- if (HashTable[TokenCounter].Value) {
- if (FlagPg)
- WrtEndPg();
- fprintf(FileOut, "\n %-20s ", HashTable[TokenCounter].Value);
- for (ListPtr = HashTable[TokenCounter].OccurList,
- NumOnLine = 0; ListPtr != NULL;
- ListPtr = ListPtr->Link, NumOnLine++) {
- if (NumOnLine == 10) {
- fprintf("FileOut, \n ");
- if (FlagPg)
- WrtEndPg();
- NumOnLine = 0;
- }
- fprintf(FileOut, "%3d ", ListPtr->Reference);
- }
- }
- }
- fprintf(FileOut, "\n");
- } /* PrintTable */
-
- VOID SortTokens(SortNumber)
- int SortNumber;
- /* SortTokens sorts the tokens into alphabetical order
- * using a Shell sort.
- */
- { /* SortTokens */
- TokenType Exchange;
- int BaseCounter;
- int CurCounter;
- int CurGapCounter;
- int Gap;
- int LastInBuf;
-
- LastInBuf = SortNumber;
- /* Look through all Gaps starting with a gap half the
- * size of the list. Each time reduce the size of the
- * gap by dividing by two.
- */
- for (Gap = LastInBuf / 2; Gap > 0; Gap /=2)
-
- /* BaseCounter moves between the Gap-th element and
- * the last element in the list. The sort using the
- * current Gap runs until I is the LastInBuf-th
- * element in the list.
- */
- for (BaseCounter = Gap; BaseCounter < LastInBuf; BaseCounter++)
-
- /* For an BaseCounter we compare the CurCounter-th and
- * the CurGapCounter-th elements in the list. CurCounter
- * and CurGapCounter are always separated by Gap.
- */
- for (CurCounter = BaseCounter - Gap; CurCounter >= 0;
- CurCounter -= Gap) {
- CurGapCounter = CurCounter + Gap;
-
- /* If the two elements compare correctly, stop.
- */
- if (NameCompare (CurCounter, CurGapCounter) < EQUALS)
- break;
-
- /* Otherwise, exchange the elements.
- */
- Exchange.Value =
- HashTable[CurCounter].Value;
- Exchange.OccurList =
- HashTable[CurCounter].OccurList;
- HashTable[CurCounter].Value =
- HashTable[CurGapCounter].Value;
- HashTable[CurCounter].OccurList =
- HashTable[CurGapCounter].OccurList;
- HashTable[CurGapCounter].Value =
- Exchange.Value;
- HashTable[CurGapCounter].OccurList =
- Exchange.OccurList;
- }
- } /* SortTokens */
-
- int TransformId(Word)
- char Word[];
- /* TransformId converts the identifier into an integer
- * within the index range of HashTable
- */
- { /* transformId */
- int Term;
- int WordIndex;
-
- Term = 0;
- for (WordIndex = strlen(Word) - 1; WordIndex > -1;
- WordIndex--)
- Term = (257 * Term) + Word[WordIndex];
- Term = (Term < 0) ? -Term : Term;
- return (Term % HTSIZE);
- } /* transformId */